{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "运行环境：jupyter notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>distance_from_home</th>\n",
       "      <th>distance_from_last_transaction</th>\n",
       "      <th>ratio_to_median_purchase_price</th>\n",
       "      <th>repeat_retailer</th>\n",
       "      <th>used_chip</th>\n",
       "      <th>used_pin_number</th>\n",
       "      <th>online_order</th>\n",
       "      <th>fraud</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>57.877857</td>\n",
       "      <td>0.31114</td>\n",
       "      <td>1.94594</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   distance_from_home  distance_from_last_transaction  \\\n",
       "0           57.877857                         0.31114   \n",
       "\n",
       "   ratio_to_median_purchase_price  repeat_retailer  used_chip  \\\n",
       "0                         1.94594              1.0        1.0   \n",
       "\n",
       "   used_pin_number  online_order  fraud  \n",
       "0              0.0           0.0    0.0  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "data=pd.read_csv('card_transdata.csv')\n",
    "data.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1000000, 8)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = data.shape[1]\n",
    "X = data.iloc[:, 0:cols - 1]\n",
    "y = data.iloc[:, cols - 1:cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1000000, 7)\n",
      "(1000000, 1)\n"
     ]
    }
   ],
   "source": [
    "print(X.shape)\n",
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#划分固定的训练集和测试集\n",
    "from sklearn.model_selection import train_test_split \n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01,random_state=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(990000, 7)\n",
      "(990000, 1)\n",
      "(10000, 7)\n",
      "(10000, 1)\n"
     ]
    }
   ],
   "source": [
    "#我们用这个固定的训练集去训练模型\n",
    "print(X_train.shape)\n",
    "print(y_train.shape)\n",
    "\n",
    "#测试集检验模型泛化能力\n",
    "print(X_test.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# X_train.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# X_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# y_train.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# y_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
